export CUDA_DEVICE_ORDER=PCI_BUS_ID 
export CUDA_VISIBLE_DEVICES=1,5,6,7
export HF_HUB_OFFLINE=1

vllm serve /file/ljw22/Qwen2.5-72B-Instruct-GPTQ-Int4 \
	--served-model-name qwen2.5-72b-instruct \
	--enable-auto-tool-choice \
	--tool-call-parser hermes \
	--max-model-len=32768 \
	--tensor-parallel-size 4 \
	--port 8989
